---
title: "fwer_excess"
author: "nxu"
date: "1-2-2021"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)

rm(list=ls())
library(cherry)
library(hommel)
library(globaltest)
library(ctgt)
###++++++++++=============================================================================================
# fwer control per pathway database
make_error_dagshfl <- function(data, pathinfo, run=2000, seedbase){
  ## generate design matrix
  n = dim(data)[1]
  m = dim(data)[2]-1
  X = data[,1:m]
  ## with same metabolites as ind ata
  xs= colnames(X)
  
  ## store the result
  resdag = matrix(0,length(pathinfo),run)
  ressh = matrix(0,length(pathinfo),run)
  resfl = matrix(0,length(pathinfo),run)
  for(r in 1:run){
    set.seed(seedbase+r)
    y = sample(0:1, n, replace = TRUE)
    
    mygt <- function (hps){
      sqrW = sqrt(mean(y)*(1-mean(y)) ) # sqrt of covariance of y
      WIHZ = sqrW *(sweep(X,2,colMeans(X))) ## W^{1/2}*(I-H)Z
      IHZ = WIHZ/sqrW ##(I-H)Z
      # the full model
      test = sum(colSums(y*IHZ[,hps,drop=F])^2)# full test statistic = y^t (I-H) Z Z^t (I-H) y
      lamt = round(eigen(tcrossprod(WIHZ[,hps,drop=F]),symmetric = T,only.values = T)$values,8)
      pv(test,lamt)
    }
    
    ##
    stc = construct(pathinfo)
    DAG <- DAGmethod(stc, mygt,isadjusted = T)
    dager = !is.na(pvalue(DAG))
    resdag[,r] = dager
    
    ## SH
    SH = tryCatch(structuredHolm(stc, mygt,isadjusted = T), error = function(e) 0) 
    if(class(SH)!="DAG"){
      sher = rep(FALSE, length(pathinfo))
    } else{
      sher = !is.infinite(pvalue(SH)) 
    }
    ressh[,r] = sher
    
    ## FL
    names(pathinfo) = pathinfo
    fc = findFocus(pathinfo, maxsize = 10, atoms = TRUE)
    resf = focusLevel(mygt, pathinfo, focus = fc)
    pf = unlist(resf$focuslevel)
    fler = pf<0.05
    resfl[,r] = fler
  }
  return(list(dag = resdag, sh = ressh, fl = resfl))
}

###++++++++++=============================================================================================
make_error_post <- function(data, pathinfo, run = 2000, seedbase){
  n = dim(data)[1]
  m = dim(data)[2]-1
  X = data[,1:m]
  ## with same metabolites as ind ata
  xs= colnames(X)
  
  ## store the result
  reshommel = c()
  resgt = c()
  resctgt1 = c()
  resctgt2 = c()
  for(r in 1:run){
    set.seed(seedbase+r)
    y = sample(0:1, n, replace = TRUE)
    
    mygt <- function (hps){
      sqrW = sqrt(mean(y)*(1-mean(y)) ) # sqrt of covariance of y
      WIHZ = sqrW *(sweep(X,2,colMeans(X))) ## W^{1/2}*(I-H)Z
      IHZ = WIHZ/sqrW ##(I-H)Z
      # the full model
      test = sum(colSums(y*IHZ[,hps,drop=F])^2)# full test statistic = y^t (I-H) Z Z^t (I-H) y
      lamt = round(eigen(tcrossprod(WIHZ[,hps,drop=F]),symmetric = T,only.values = T)$values,8)
      pv(test,lamt)
    }
    ## gt
    resgt[r] = mygt(xs)<0.05
    
    ## Hommel
    pval = sapply(xs, mygt )
    hom=hommel(pval,simes = T)
    pa = sapply(pathinfo, function(x) localtest(hom, x,tdp=0))
    hmer = sum(which(pa<0.05)) > 0 
    reshommel[r] = hmer
    
    ##ctgt
    #ctgt1 = sapply(pathinfo, function(i) actgt(y,X,xs, as.character(i),maxit= 0)[1])
    ctgt2 = sapply(pathinfo, function(i) actgt(y,X,xs, as.character(i),maxit= 0)[1])
    #resctgt1[r] = sum(ctgt1 == "reject") >0
    resctgt2[r] = sum(ctgt2 == "reject") >0
    
  }
  res = c(mean(resgt), mean(reshommel), mean(resctgt2) )
  names(res) = c("gt", "hommel",  "ctgt500")
  return(list(fwer = res, ctgt = ctgt2 ) )
   
}

```

```{r loaddata,echo=FALSE}

## load data table
load("Taware/d760.RData")
load("Taware/pathwaydatabase760/wikipath_metab_760.RData")
load("Taware/pathwaydatabase760/keggpath_metab_760.RData")
load("Taware/pathwaydatabase760/biocycpath_metab_760.RData")
load("Taware/pathwaydatabase760/smpdbhmdbpath_metab_760.RData")
load("Taware/pathwaydatabase760/proteinhmdbpath_metab_760.RData")
load("Taware/pathwaydatabase760/biofunctionpath_metab_760.RData")

```


```{r runondata, echo=FALSE }

fullset = colnames(d760)[1:(ncol(d760)-1)]

wiki = make_error_dagshfl(data = d760, pathinfo = c(wikipath_uniq ) , seedbase = 760)
smpdb = make_error_dagshfl(data = d760, pathinfo = c(smpdbhmdbpath_metab ),  seedbase = 760)
kegg = make_error_dagshfl(data = d760, pathinfo = c(keggpath_metab ),  seedbase = 760)
biocyc = make_error_dagshfl(data = d760, pathinfo = c(biocycpath_metab ),  seedbase = 760)
protein = make_error_dagshfl(data = d760, pathinfo = c(proteinhmdbpath_metab ),  seedbase = 760)
biofunction = make_error_dagshfl(data = d760, pathinfo = c(biofunctionpath_metab ), seedbase = 760)

RES = rbind( c( sum( colSums(kegg$dag) > 0 ),sum( colSums(kegg$fl) > 0 ) ,sum( colSums(kegg$sh) > 0 ) ) ,
       c( sum( colSums(biocyc$dag) > 0 ),sum( colSums(biocyc$fl) > 0 ) ,sum( colSums(biocyc$sh) > 0 )  ) ,
       c( sum( colSums(smpdb$dag) > 0 ),sum( colSums(smpdb$fl) > 0 ),  sum( colSums(smpdb$sh) > 0 )  )  ,
       c( sum( colSums(biofunction$dag) > 0 ),sum( colSums(biofunction$fl) > 0 ),sum( colSums(biofunction$sh) > 0 )  ) ,
       c( sum( colSums(protein$dag) > 0 ),sum( colSums(protein$fl) > 0 ) ,sum( colSums(protein$sh) > 0 ) ),
       c( sum( colSums(wiki$dag) > 0 ),sum( colSums(wiki$fl) > 0 )   ,sum( colSums(wiki$sh) > 0 ) ) ) /2000
colnames(RES) = c("dag", "fl", "sh")
rownames(RES) = c("kegg", "biocyc", "smpdb", "biofunction", "protein","wiki")

overall = c(sum( colSums(rbind(wiki$dag, kegg$dag, biocyc$dag ,smpdb$dag, biofunction$dag,protein$dag )) > 0 ), #sum( colSums(rbind(wiki$dag, kegg$dag, biocyc$dag ,smpdb$dag,biofunction$dag,protein$dag)) > 0 
sum( colSums(rbind(wiki$fl, kegg$fl, biocyc$fl ,smpdb$fl ,  biofunction$fl,protein$fl)) > 0 ),
sum( colSums(rbind(wiki$sh, kegg$sh, biocyc$sh ,smpdb$sh ,  biofunction$sh,protein$sh)) > 0 ) ) /2000

numberpath = c(length(keggpath_metab), length(biocycpath_metab), length(smpdbhmdbpath_metab), length(biofunctionpath_metab), length(proteinhmdbpath_metab) ,length(wikipath_uniq))
pathInfo = c(numberpath, sum(numberpath))
cbind( pathInfo,  rbind(RES,overall) )

# allpathway = c(wikipath_uniq,keggpath_metab, biocycpath_metab, smpdbhmdbpath_metab, biofunctionpath_metab,proteinhmdbpath_metab)
# allpathway = allpathway[!duplicated(allpathway) ]
# 
# allpath = make_error_post(data = d760, pathinfo = c(allpathway, list(fullset)),run = 2000, seedbase = 760)
# allpath$fwer
#allpath$ctgt

```